*05_results_shortrun.do

capture log close

global root =  "/disk/bulkw/nencka/schooling_pandemic/2021_10_18_final/"
global input   "$root/Input"
global scripts "$root/Scripts"
global output  "$root/Output"
global temp    "$root/Temp"
global log     "$root/Log"
global figures "$root/Figures"

set scheme plotplain



*****************************************************************************
*****************************************************************************
*****************************************************************************

log using "$log/05_results_shortrun", replace text


*Load schooling laws from Claudia Goldin

	use "$input/cg_cs_laws.dta", clear

	*Keep 1920 data
	keep if year == 1920

	keep stfip labage 

	rename (stfip labage) (statefip age_work_permit) 
	sort statefip
	save "$temp/cg_cs_laws_1920.dta", replace

*Load 1910 and 1920 census data

use birth_order race sex statefip birthyr serial school mcd fbpl occscore_base occscore_f occ1950_base occ1950_f ///
	using "$temp/data1920_1918pandemic.dta", clear

gen census_year = 1920

append using "$temp/data1910_1918pandemic.dta", ///
	keep(birth_order race sex statefip birthyr serial school mcd fbpl occscore_base occscore_f occ1950_base occ1950_f)
replace census_year = 1910 if mi(census_year)

cap drop age
gen age = census_year-birthyr
tab age, m
keep if inrange(age,0,25)
compress


*Birth order is only non-missing for children of the household head--so create a new category for other children

tab birth_order, m
replace birth_order = 999 if missing(birth_order)


*Create version of household ID
	tostring serial, replace 
	replace serial = serial+"_"+"20" if census_year == 1920
	replace serial = serial+"_"+"10" if census_year == 1910
	
	egen long serial_c = group(serial)


*Create outcome variable

	gen in_school = 1 if school == 2
	replace in_school = 0 if school == 1

cap drop _m 
sort statefip


*Merge on local school closure data and subset to towns with valid school closures

 	sort statefip mcd

 	merge m:1  statefip mcd using "$temp/school_closures_towns_1920.dta"
	tab mcd if _m == 2, m

	drop if missing(days_closed)
	
	tab _m 
	drop if _m == 2
	drop _m 


*Merge on mortality data

	sort statefip mcd  
	merge m:1 statefip mcd using "$temp/flu_mortality.dta"
	tab _m 

	drop if _m == 2
	drop _m 

 	egen std_excess_death = std(excess_death_ratio)
	sum std_excess_death, d
	replace std_excess_death = r(p99) if std_excess_death> r(p99) & !missing(std_excess_death)
	replace std_excess_death = r(p1) if std_excess_death< r(p1) & !missing(std_excess_death)


*Merge on local covariates from 1910
	sort statefip mcd  
	cap drop _m school
	rename days_closed days_closed_pre_m
 
	merge m:1 statefip mcd using "$temp/city_covariates.dta"
	drop days_closed
	rename days_closed_pre_m days_closed
	drop _m 

	
	egen mcd_c = group(statefip mcd)

	gen age_at_census = age
	
	tab age_at_census census_year
	
	replace occ1950_f = 999999 if mi(occ1950_f)

	save "$temp/analysis_data_shortrun.dta", replace




use "$temp/analysis_data_shortrun.dta", clear

	su days_closed, de 

	tab age_at_census census_year
	cap drop age_bin

	gen age_bin = 1 if inrange(age_at_census,0,5)
	replace age_bin = 2 if inrange(age_at_census,6,10)
	replace age_bin = 3 if inrange(age_at_census,11,14)
	replace age_bin = 4 if inrange(age_at_census,15,18)
	replace age_bin = 5 if inrange(age_at_census,19,21)
	replace age_bin = 6 if inrange(age_at_census,22,25)
	

	tab age_bin, mi 

	*Create Census region indicators

	gen region = 1 if inlist(statefip, 9, 23, 25, 33, 44, 50, 34, 36, 42)
	replace region = 2 if inlist(statefip, 17, 18, 26, 39, 55, 19, 20, 27, 29, 31, 38, 46)
	replace region = 3 if inlist(statefip, 10, 11, 12, 13, 24, 37, 45, 51, 54, 1, 21, 28, 47, 5, 22, 40, 48)
	replace region = 4 if inlist(statefip, 4, 8, 16, 30, 32, 35, 49, 56, 2, 6, 15, 41, 53)


	*Generate heterogenity variables

		*Top occupation dad
		gen top_dad_occupation = 1 if occscore_f > 25 & ~mi(occscore_f)
		replace top_dad_occupation = 0 if occscore_f <= 25  & ~mi(occscore_f)

		*Assign birthplace (via https://usa.ipums.org/usa-action/variables/BPL#codes_section)
		gen dad_foreign = 1 if fbpl > 13000 & fbpl < 90000  & ~mi(fbpl)
		replace dad_foreign = 0 if mi(dad_foreign) & ~mi(fbpl)

		tab top_dad_occupation dad_foreign, mi

		*Generate race variable
		capture drop black
		gen black = 1 if inlist(race,200,210)
		replace black = 0 if mi(black)
		tab race black, m
	
	gen weeks_closed_3wks = days_closed/21
	su weeks_closed_3wks days_closed, de 

	sort statefip
	merge m:1 statefip using "$temp/cg_cs_laws_1920.dta"
	drop if _m == 2
	drop _m 

	*Impute DC
	gen can_work = 1 if ((age >= age_work_permit) | age_work_permit == 0)  & ~mi(age_work_permit) & census_year == 1920
	replace can_work = 0 if age < age_work_permit & ~mi(age_work_permit)  & census_year == 1920

	tab age can_work if census_year == 1920, mi

	gen ihs_days_closed = log(days_closed  + sqrt(days_closed ^2 + 1))
*****************************************************************************
*****************************************************************************
*****************************************************************************


*Include all 1910 city characteristic controls (interacted with birth year)
*	in the short-run specifications and focus on regressions with
*	birth year, state-by-birth year, and region-by-birth year fixed effects

tab race, m
tab mcd_c, m
desc, fullnames

local ind_covs "race##i.birthyr race##sex##mcd_c"
local city_by_covs "c.in_school_6_10_avg##i.birthyr c.in_school_11_14_avg##i.birthyr c.in_school_15_18_avg##i.birthyr c.occscore_base_avg##i.birthyr c.foreignb_avg##i.birthyr c.count##i.birthyr"



*With Region-Year fixed effects (region##birthyr)
	*No controls for mortality 
		*1910 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1910, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1910_rby", replace) 

		*1920 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1920, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1920_rby", replace) 

	*With mort controls 
		*1910 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks ib1.age_bin##c.std_excess_death if census_year == 1910, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1910_m_rby", replace) 

		*1920 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks ib1.age_bin##c.std_excess_death if census_year == 1920, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1920_m_rby", replace) 

*With SBY fixed effects (statefip##birthyr)
	*1910 only
		reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1910, absorb(statefip##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1910_sby", replace) 

	*1920 only
		reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1920, absorb(statefip##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1920_sby", replace) 


*State clustered errors
		*1910 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1910, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(statefip)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1910_statec", replace) 

		*1920 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1920, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(statefip)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1920_statec", replace) 

*Robustness check: check results only for cities that had info on other NPIs 

	*Baseline using limited-city-sample
		*1920 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1920 & ~mi(days_npi_net_schoolclosures), absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1920_rby_npisample", replace) 

	*Conditioning on net NPIs (All NPI Days - Days Closed)
		*1920 only
			reghdfe in_school ib1.age_bin##c.weeks_closed_3wks ib1.age_bin##c.days_npi_net_schoolclosures if census_year == 1920 & ~mi(days_npi_net_schoolclosures), absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
			parmest, format(estimate min95 max95) level(95) saving("$temp/baseline_1920_rby_othernpi", replace) 


*Baseline regressions specification checks

		*No zeros

		reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1910 & days_closed > 0, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/nozero_1910_rby", replace) 

		reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1920 & days_closed > 0, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/nozero_1920_rby", replace) 

		*IHS
		reghdfe in_school ib1.age_bin##c.ihs_days_closed if census_year == 1920, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/ihs_1920", replace) 

		reghdfe in_school ib1.age_bin##c.ihs_days_closed if census_year == 1910, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/ihs_1910", replace) 
	

	*Baseline regressions with control for whether you can work

		reghdfe in_school ib1.age_bin##c.weeks_closed_3wks can_work if census_year == 1920, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/workc_1920_rby", replace) 


	*Drop older kids who can't work
	tab age can_work
	drop if can_work == 0 & age >= 14 
	reghdfe in_school ib1.age_bin##c.weeks_closed_3wks if census_year == 1920, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c) timeit
		parmest, format(estimate min95 max95) level(95) saving("$temp/canwork_1920_rby", replace) 



log close

